/*-------------------------------------------------------------------------
 *
 * xfunc.c--
 *    Utility routines to handle expensive function optimization.
 *    Includes xfunc_trypullup(), which attempts early pullup of predicates
 *    to allow for maximal pruning.
 *    
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    $Header: /usr/local/cvsroot/postgres95/src/backend/optimizer/path/xfunc.c,v 1.2 1996/10/23 07:14:43 bryanh Exp $
 *
 *-------------------------------------------------------------------------
 */
#ifndef WIN32
#include <math.h>	/* for MAXFLOAT on most systems */
#else
#include <float.h>
#define MAXFLOAT DBL_MAX
#endif /* WIN32 */

#include <values.h>	/* for MAXFLOAT on SunOS */
#include <string.h>

#include "postgres.h"
#include "nodes/pg_list.h"
#include "nodes/nodes.h"
#include "nodes/primnodes.h"
#include "nodes/relation.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/syscache.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "utils/syscache.h"
#include "catalog/pg_language.h"
#include "optimizer/xfunc.h"
#include "optimizer/clauses.h"
#include "optimizer/pathnode.h"
#include "optimizer/internal.h"
#include "optimizer/cost.h"
#include "optimizer/keys.h"
#include "optimizer/tlist.h"
#include "lib/lispsort.h"
#include "access/heapam.h"
#include "tcop/dest.h"
#include "storage/buf_internals.h"	/* for NBuffers */
#include "optimizer/tlist.h"  /* for get_expr */

#define ever ; 1 ;

/* local funcs */
static int xfunc_card_unreferenced(Query *queryInfo, 
				   Expr *clause, Relid referenced); */

/*
** xfunc_trypullup --
**    Preliminary pullup of predicates, to allow for maximal pruning.
** Given a relation, check each of its paths and see if you can
** pullup clauses from its inner and outer.
*/

void xfunc_trypullup(Rel rel)
{
    LispValue y;            /* list ptr */
    CInfo maxcinfo;         /* The CInfo to pull up, as calculated by
			       xfunc_shouldpull() */
    JoinPath curpath;       /* current path in list */
    int progress;           /* has progress been made this time through? */
    int clausetype;
    
    do {
	progress = false;  /* no progress yet in this iteration */
	foreach(y, get_pathlist(rel)) {
	    curpath = (JoinPath)lfirst(y);
	    
	    /*
	     ** for each operand, attempt to pullup predicates until first 
	     ** failure.
	     */
	    for(ever) {
		/* No, the following should NOT be '=='  !! */
		if (clausetype = 
		    xfunc_shouldpull((Path)get_innerjoinpath(curpath),
				     curpath, INNER, &maxcinfo)) {
		    
		    xfunc_pullup((Path)get_innerjoinpath(curpath),
				 curpath, maxcinfo, INNER, clausetype);
		    progress = true;
		}else
		    break;
	    }
	    for(ever) {
		
		/* No, the following should NOT be '=='  !! */
		if (clausetype = 
		    xfunc_shouldpull((Path)get_outerjoinpath(curpath), 
				     curpath, OUTER, &maxcinfo)) {
		    
		    xfunc_pullup((Path)get_outerjoinpath(curpath),
				 curpath, maxcinfo, OUTER, clausetype);
		    progress = true;
		}else
		    break;
	    }
	    
	    /* 
	     ** make sure the unpruneable flag bubbles up, i.e.
	     ** if anywhere below us in the path pruneable is false,
	     ** then pruneable should be false here
	     */
	    if (get_pruneable(get_parent(curpath)) &&
		(!get_pruneable(get_parent
				((Path)get_innerjoinpath(curpath))) ||
		 !get_pruneable(get_parent((Path)
					   get_outerjoinpath(curpath))))) {

		set_pruneable(get_parent(curpath),false);
		progress = true;
	    }
	}
    } while(progress);
}

/*
 ** xfunc_shouldpull --
 **    find clause with highest rank, and decide whether to pull it up
 ** from child to parent.  Currently we only pullup secondary join clauses
 ** that are in the pathclauseinfo.  Secondary hash and sort clauses are
 ** left where they are.
 **    If we find an expensive function but decide *not* to pull it up,
 ** we'd better set the unpruneable flag.  -- JMH, 11/11/92
 **
 ** Returns:  0 if nothing left to pullup
 **           XFUNC_LOCPRD if a local predicate is to be pulled up
 **           XFUNC_JOINPRD if a secondary join predicate is to be pulled up
 */
int xfunc_shouldpull(Query* queryInfo,
		     Path childpath,
		     JoinPath parentpath,
		     int whichchild,
		     CInfo *maxcinfopt)  /* Out: pointer to clause to pullup */
{
    LispValue clauselist, tmplist; /* lists of clauses */
    CInfo maxcinfo;		/* clause to pullup */
    LispValue primjoinclause	/* primary join clause */
	= xfunc_primary_join(parentpath);
    Cost tmprank, maxrank = (-1 * MAXFLOAT); /* ranks of clauses */
    Cost joinselec = 0;	/* selectivity of the join predicate */
    Cost joincost = 0;	/* join cost + primjoinclause cost */
    int retval = XFUNC_LOCPRD;
    
    clauselist = get_locclauseinfo(childpath);
    
    if (clauselist != LispNil) {
	/* find local predicate with maximum rank */
	for (tmplist = clauselist,
	     maxcinfo = (CInfo) lfirst(tmplist),
	     maxrank = xfunc_rank(get_clause(maxcinfo));
	     tmplist != LispNil;
	     tmplist = lnext(tmplist)) {
	
 	if ((tmprank = xfunc_rank(get_clause((CInfo)lfirst(tmplist)))) 
	    > maxrank) {
	    maxcinfo = (CInfo) lfirst(tmplist);
	    maxrank = tmprank;
	  }
	}
    }
    
    /* 
     ** If child is a join path, and there are multiple join clauses,
     ** see if any join clause has even higher rank than the highest
     ** local predicate 
     */
    if (is_join(childpath) && xfunc_num_join_clauses((JoinPath)childpath) > 1)
	for (tmplist = get_pathclauseinfo((JoinPath)childpath);
	     tmplist != LispNil;
	     tmplist = lnext(tmplist)) {
	    
    if (tmplist != LispNil &&
	(tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)))) 
	> maxrank) {
	maxcinfo = (CInfo) lfirst(tmplist);
	maxrank = tmprank;
	retval = XFUNC_JOINPRD;
    }
  }    
    if (maxrank == (-1 * MAXFLOAT))	/* no expensive clauses */
	return(0);
    
    /*
     ** Pullup over join if clause is higher rank than join, or if 
     ** join is nested loop and current path is inner child (note that
     ** restrictions on the inner of a nested loop don't buy you anything --
     ** you still have to scan the entire inner relation each time).
     ** Note that the cost of a secondary join clause is only what's
     ** calculated by xfunc_expense(), since the actual joining 
     ** (i.e. the usual path_cost) is paid for by the primary join clause.
     */
    if (primjoinclause != LispNil) {
	joinselec = compute_clause_selec(queryInfo, primjoinclause, LispNil);
	joincost = xfunc_join_expense(parentpath, whichchild);
	
	if (XfuncMode == XFUNC_PULLALL ||
	    (XfuncMode != XFUNC_WAIT &&
	     ((joincost != 0 &&
	       (maxrank = xfunc_rank(get_clause(maxcinfo))) > 
	       ((joinselec - 1.0) / joincost))
	      || (joincost == 0 && joinselec < 1)
	      || (!is_join(childpath) 
		  && (whichchild == INNER)
		  && IsA(parentpath,JoinPath)
		  && !IsA(parentpath,HashPath) 
		  && !IsA(parentpath,MergePath))))) {

	    *maxcinfopt = maxcinfo;
	    return(retval);

	}else if (maxrank != -(MAXFLOAT)) {
	    /* 
	     ** we've left an expensive restriction below a join.  Since
	     ** we may pullup this restriction in predmig.c, we'd best
	     ** set the Rel of this join to be unpruneable
	     */
	    set_pruneable(get_parent(parentpath), false);
	    /* and fall through */
	}
    }
    return(0);
}


/*
 ** xfunc_pullup --
 **    move clause from child pathnode to parent pathnode.   This operation 
 ** makes the child pathnode produce a larger relation than it used to.
 ** This means that we must construct a new Rel just for the childpath,
 ** although this Rel will not be added to the list of Rels to be joined up
 ** in the query; it's merely a parent for the new childpath.
 **    We also have to fix up the path costs of the child and parent.
 **
 ** Now returns a pointer to the new pulled-up CInfo. -- JMH, 11/18/92
 */
CInfo xfunc_pullup(Query* queryInfo,
		   Path childpath,
		   JoinPath parentpath,
		   CInfo cinfo,   /* clause to pull up */
		   int whichchild,/* whether child is INNER or OUTER of join */
		   int clausetype)/* whether clause to pull is join or local */
{
    Path newkid;
    Rel newrel;
    Cost pulled_selec;
    Cost cost;
    CInfo newinfo;
    
    /* remove clause from childpath */
    newkid = (Path)copyObject((Node)childpath);
    if (clausetype == XFUNC_LOCPRD) {
	set_locclauseinfo(newkid, 
			  xfunc_LispRemove((LispValue)cinfo, 
					   (List)get_locclauseinfo(newkid)));
    }else {
	set_pathclauseinfo
	    ((JoinPath)newkid,
	     xfunc_LispRemove((LispValue)cinfo, 
			      (List)get_pathclauseinfo((JoinPath)newkid)));
    }
    
    /*
     ** give the new child path its own Rel node that reflects the
     ** lack of the pulled-up predicate
     */
    pulled_selec = compute_clause_selec(queryInfo,
					get_clause(cinfo), LispNil);
    xfunc_copyrel(get_parent(newkid), &newrel);
    set_parent(newkid, newrel);
    set_pathlist(newrel, lcons(newkid, NIL));
    set_unorderedpath(newrel, (PathPtr)newkid);
    set_cheapestpath(newrel, (PathPtr)newkid);
    set_size(newrel, 
	     (Count)((Cost)get_size(get_parent(childpath)) / pulled_selec));
    
    /* 
     ** fix up path cost of newkid.  To do this we subtract away all the
     ** xfunc_costs of childpath, then recompute the xfunc_costs of newkid
     */
    cost = get_path_cost(newkid) - xfunc_get_path_cost(childpath);
    Assert(cost >= 0);
    set_path_cost(newkid, cost);
    cost = get_path_cost(newkid) + xfunc_get_path_cost(newkid);
    set_path_cost(newkid, cost);
    
    /*
     ** We copy the cinfo, since it may appear in other plans, and we're going
     ** to munge it.  -- JMH, 7/22/92
     */
    newinfo = (CInfo)copyObject((Node)cinfo);
    
    /* 
     ** Fix all vars in the clause 
     ** to point to the right varno and varattno in parentpath 
     */
    xfunc_fixvars(get_clause(newinfo), newrel, whichchild);
    
    /*  add clause to parentpath, and fix up its cost. */
    set_locclauseinfo(parentpath, 
		      lispCons((LispValue)newinfo, 
			       (LispValue)get_locclauseinfo(parentpath)));
    /* put new childpath into the path tree */
    if (whichchild == INNER) {
	set_innerjoinpath(parentpath, (pathPtr)newkid);
    }else {
	set_outerjoinpath(parentpath, (pathPtr)newkid);
    }
    
    /* 
     ** recompute parentpath cost from scratch -- the cost
     ** of the join method has changed
     */
    cost = xfunc_total_path_cost(parentpath);
    set_path_cost(parentpath, cost);
    
    return(newinfo);
}

/*
 ** calculate (selectivity-1)/cost. 
 */
Cost xfunc_rank(Query *queryInfo,LispValue clause)
{
    Cost selec = compute_clause_selec(queryInfo, clause, LispNil); 
    Cost cost = xfunc_expense(queryInfo,clause);
    
    if (cost == 0) 
	if (selec > 1) return(MAXFLOAT);
	else return(-(MAXFLOAT));
    return((selec - 1)/cost);
}

/*
 ** Find the "global" expense of a clause; i.e. the local expense divided
 ** by the cardinalities of all the base relations of the query that are *not*
 ** referenced in the clause.
 */
Cost xfunc_expense(Query* queryInfo, clause)
     LispValue clause;
{
    Cost cost = xfunc_local_expense(clause);
    
    if (cost)
	{
	    Count card = xfunc_card_unreferenced(queryInfo, clause, LispNil);
	    if (card)
		cost /= card;
	}
    
    return(cost);
}

/*
 ** xfunc_join_expense --
 **    Find global expense of a join clause
 */
Cost xfunc_join_expense(Query *queryInfo, JoinPath path, int whichchild)
{
    LispValue primjoinclause = xfunc_primary_join(path);
    
    /*
     ** the second argument to xfunc_card_unreferenced reflects all the
     ** relations involved in the join clause, i.e. all the relids in the Rel
     ** of the join clause
     */
    Count card = 0;
    Cost cost = xfunc_expense_per_tuple(path, whichchild);
    
    card = xfunc_card_unreferenced(queryInfo, 
				   primjoinclause, 
				   get_relids(get_parent(path)));
    if (primjoinclause)
	cost += xfunc_local_expense(primjoinclause);
    
    if (card) cost /= card;
    
    return(cost);
}

/*
 ** Recursively find the per-tuple expense of a clause.  See
 ** xfunc_func_expense for more discussion.
 */
Cost xfunc_local_expense(LispValue clause)
{
    Cost cost = 0;   /* running expense */
    LispValue tmpclause;
    
    /* First handle the base case */
    if (IsA(clause,Const) || IsA(clause,Var) || IsA(clause,Param)) 
	return(0);
    /* now other stuff */
    else if (IsA(clause,Iter))
	/* Too low. Should multiply by the expected number of iterations. */
	return(xfunc_local_expense(get_iterexpr((Iter)clause)));
    else if (IsA(clause,ArrayRef))
	return(xfunc_local_expense(get_refexpr((ArrayRef)clause)));
    else if (fast_is_clause(clause)) 
	return(xfunc_func_expense((LispValue)get_op(clause), 
				  (LispValue)get_opargs(clause)));
    else if (fast_is_funcclause(clause))
	return(xfunc_func_expense((LispValue)get_function(clause),
				  (LispValue)get_funcargs(clause)));
    else if (fast_not_clause(clause))
	return(xfunc_local_expense(lsecond(clause)));
    else if (fast_or_clause(clause)) {
	/* find cost of evaluating each disjunct */
	for (tmpclause = lnext(clause); tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    cost += xfunc_local_expense(lfirst(tmpclause));
	return(cost);
    }else {
	elog(WARN, "Clause node of undetermined type");
	return(-1);
    }
}

/*
 ** xfunc_func_expense --
 **    given a Func or Oper and its args, find its expense.
 ** Note: in Stonebraker's SIGMOD '91 paper, he uses a more complicated metric
 ** than the one here.  We can ignore the expected number of tuples for
 ** our calculations; we just need the per-tuple expense.  But he also
 ** proposes components to take into account the costs of accessing disk and
 ** archive.  We didn't adopt that scheme here; eventually the vacuum
 ** cleaner should be able to tell us what percentage of bytes to find on
 ** which storage level, and that should be multiplied in appropriately
 ** in the cost function below.  Right now we don't model the cost of
 ** accessing secondary or tertiary storage, since we don't have sufficient
 ** stats to do it right.
 */
Cost xfunc_func_expense(LispValue node, LispValue args)
{
    HeapTuple tupl;    /* the pg_proc tuple for each function */
    Form_pg_proc proc; /* a data structure to hold the pg_proc tuple */
    int width = 0;     /* byte width of the field referenced by each clause */
    RegProcedure funcid;    /* ID of function associate with node */
    Cost cost = 0;   /* running expense */
    LispValue tmpclause;
    LispValue operand;	/* one operand of an operator */
    
    if (IsA(node,Oper)) {
	/* don't trust the opid in the Oper node.  Use the opno. */
	if (!(funcid = get_opcode(get_opno((Oper)node))))
	    elog(WARN, "Oper's function is undefined");
    }else {
	funcid = get_funcid((Func)node);
    }
    
    /* look up tuple in cache */
    tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid),0,0,0);
    if (!HeapTupleIsValid(tupl))
	elog(WARN, "Cache lookup failed for procedure %d", funcid);
    proc = (Form_pg_proc) GETSTRUCT(tupl);
    
    /* 
     ** if it's a Postquel function, its cost is stored in the
     ** associated plan.
     */
    if (proc->prolang == SQLlanguageId) {
	LispValue tmpplan;
	List planlist;
	
	if (IsA(node,Oper) || get_func_planlist((Func)node) == LispNil) {
	    Oid *argOidVect; /* vector of argtypes */
	    char *pq_src;	/* text of PQ function */
	    int nargs;	/* num args to PQ function */
	    QueryTreeList *queryTree_list;	/* dummy variable */
	    
	    /* 
	     ** plan the function, storing it in the Func node for later 
	     ** use by the executor.  
	     */
	    pq_src = (char *) textout(&(proc->prosrc));
	    nargs = proc->pronargs;
	    if (nargs > 0)
		argOidVect = proc->proargtypes;
	    planlist = (List)pg_plan(pq_src, argOidVect, nargs, 
				     &parseTree_list, None);
	    if (IsA(node,Func))
		set_func_planlist((Func)node, planlist);
	    
	}else {/* plan has been cached inside the Func node already */
	    planlist = get_func_planlist((Func)node);
	}
	
	/*
	 ** Return the sum of the costs of the plans (the PQ function
	 ** may have many queries in its body).
	 */
	foreach(tmpplan, planlist)
	    cost += get_cost((Plan)lfirst(tmpplan));
	return(cost);
    }else {		/* it's a C function */
	/*
	 **  find the cost of evaluating the function's arguments
	 **  and the width of the operands
	 */
	for (tmpclause = args; tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause)) {
	    
	    if ((operand = lfirst(tmpclause)) != LispNil) {
		cost += xfunc_local_expense(operand);
		width += xfunc_width(operand);
	    }
	}
	
	/* 
	 ** when stats become available, add in cost of accessing secondary
	 ** and tertiary storage here.
	 */
	return(cost +  
	       (Cost)proc->propercall_cpu + 
	       (Cost)proc->properbyte_cpu * (Cost)proc->probyte_pct/100.00 * 
	       (Cost)width
	       /* 
		* Pct_of_obj_in_mem
		DISK_COST * proc->probyte_pct/100.00 * width
		* Pct_of_obj_on_disk +
		ARCH_COST * proc->probyte_pct/100.00 * width
		* Pct_of_obj_on_arch 
		*/
	       );
    }
}

/* 
 ** xfunc_width --
 **    recursively find the width of a expression
 */

int xfunc_width(LispValue clause)
{
    Relation rd;         /* Relation Descriptor */
    HeapTuple tupl;      /* structure to hold a cached tuple */
    TypeTupleForm type;   /* structure to hold a type tuple */
    int retval = 0;
    
    if (IsA(clause,Const)) {
	/* base case: width is the width of this constant */
	retval = get_constlen((Const) clause);
	goto exit;
    }else if (IsA(clause,ArrayRef)) {
	/* base case: width is width of the refelem within the array */
	retval = get_refelemlength((ArrayRef)clause);
	goto exit;
    }else if (IsA(clause,Var)) {
	/* base case: width is width of this attribute */
	tupl = SearchSysCacheTuple(TYPOID, 
				   PointerGetDatum(get_vartype((Var)clause)),
				   0,0,0);
	if (!HeapTupleIsValid(tupl))
	    elog(WARN, "Cache lookup failed for type %d", 
		 get_vartype((Var)clause));
	type = (TypeTupleForm) GETSTRUCT(tupl);
	if (get_varattno((Var)clause) == 0) {
	    /* clause is a tuple.  Get its width */
	    rd = heap_open(type->typrelid);
	    retval = xfunc_tuple_width(rd);
	    heap_close(rd);
	}else {
	    /* attribute is a base type */
	    retval = type->typlen;
	}
	goto exit;
    }else if (IsA(clause,Param)) {
	if (typeid_get_relid(get_paramtype((Param)clause))) {
	    /* Param node returns a tuple.  Find its width */
	    rd = heap_open(typeid_get_relid(get_paramtype((Param)clause)));
	    retval = xfunc_tuple_width(rd);
	    heap_close(rd);
	}else if (get_param_tlist((Param)clause) != LispNil) {
	    /* Param node projects a complex type */
	    Assert(length(get_param_tlist((Param)clause)) == 1); /* sanity */
	    retval = 
		xfunc_width((LispValue)
			    get_expr(lfirst(get_param_tlist((Param)clause))));
	}else {
	    /* Param node returns a base type */
	    retval = tlen(get_id_type(get_paramtype((Param)clause)));
	}
	goto exit;	 
    }else if (IsA(clause,Iter)) {
	/* 
	 ** An Iter returns a setof things, so return the width of a single
	 ** thing.
	 ** Note:  THIS MAY NOT WORK RIGHT WHEN AGGS GET FIXED,
	 ** SINCE AGG FUNCTIONS CHEW ON THE WHOLE SETOF THINGS!!!!
	 **    This whole Iter business is bogus, anyway.
	 */
	retval = xfunc_width(get_iterexpr((Iter)clause));
	goto exit;
    }else if (fast_is_clause(clause)) {
	/*
	 ** get function associated with this Oper, and treat this as 
	 ** a Func 
	 */
	tupl = SearchSysCacheTuple(OPROID, 
				   ObjectIdGetDatum(get_opno((Oper)get_op(clause))),
				   0,0,0);
	if (!HeapTupleIsValid(tupl))
	    elog(WARN, "Cache lookup failed for procedure %d", 
		 get_opno((Oper)get_op(clause)));
	return(xfunc_func_width
	       ((RegProcedure)(((OperatorTupleForm)(GETSTRUCT(tupl)))->oprcode), 
		(LispValue)get_opargs(clause)));
    }else if (fast_is_funcclause(clause)) {
	Func func = (Func)get_function(clause);
	if (get_func_tlist(func) != LispNil) {
	    /* this function has a projection on it.  Get the length
	       of the projected attribute */
	    Assert(length(get_func_tlist(func)) == 1);   /* sanity */
	    retval = 
		xfunc_width((LispValue)
			    get_expr(lfirst(get_func_tlist(func))));
	    goto exit;
	}else {
	    return(xfunc_func_width((RegProcedure)get_funcid(func),
				    (LispValue)get_funcargs(clause)));
	}
    }else {
	elog(WARN, "Clause node of undetermined type");
	return(-1);
    }
    
 exit:
    if (retval == -1)
	retval = VARLEN_DEFAULT;
    return(retval);
}

/*
 ** xfunc_card_unreferenced:
 **   find all relations not referenced in clause, and multiply their 
 ** cardinalities.  Ignore relation of cardinality 0.
 ** User may pass in referenced list, if they know it (useful
 ** for joins).
 */
static Count 
xfunc_card_unreferenced(Query *queryInfo, 
			      LispValue clause, Relid referenced)
{
    Relid unreferenced, allrelids = LispNil;
    LispValue temp;
    
    /* find all relids of base relations referenced in query */
    foreach (temp,queryInfo->base_relation_list_)
	{
	    Assert(lnext(get_relids((Rel)lfirst(temp))) == LispNil);
	    allrelids = lappend(allrelids,
				lfirst(get_relids((Rel)lfirst(temp))));
	}
    
    /* find all relids referenced in query but not in clause */
    if (!referenced)
	referenced = xfunc_find_references(clause);
    unreferenced = set_difference(allrelids, referenced);
    
    return(xfunc_card_product(unreferenced));
}

/*
 ** xfunc_card_product 
 **   multiple together cardinalities of a list relations.
 */
Count xfunc_card_product(Query *queryInfo, Relid relids)
{
    LispValue cinfonode;
    LispValue temp;
    Rel currel;
    Cost tuples;
    Count retval = 0;
    
    foreach(temp,relids) {
	currel = get_rel(lfirst(temp));
	tuples = get_tuples(currel);
	
	if (tuples)  { /* not of cardinality 0 */
	    /* factor in the selectivity of all zero-cost clauses */
	    foreach (cinfonode, get_clauseinfo(currel)) {
		if (!xfunc_expense(queryInfo,get_clause((CInfo)lfirst(cinfonode))))
		    tuples *= 
			compute_clause_selec(queryInfo,
					     get_clause((CInfo)lfirst(cinfonode)),
					     LispNil);
	    }
		    
	    if (retval == 0) retval = tuples;
	    else retval *= tuples;
	}
    }
    if (retval == 0) retval = 1;  /* saves caller from dividing by zero */
    return(retval);
}


/*
 ** xfunc_find_references:
 **   Traverse a clause and find all relids referenced in the clause.
 */
List xfunc_find_references(LispValue clause)
{
    List retval = (List)LispNil;
    LispValue tmpclause;
    
    /* Base cases */
    if (IsA(clause,Var)) 
	return(lispCons(lfirst(get_varid((Var)clause)), LispNil));
    else if (IsA(clause,Const) || IsA(clause,Param))
	return((List)LispNil);
    
    /* recursion */
    else if (IsA(clause,Iter))
	/* Too low. Should multiply by the expected number of iterations. maybe */
	return(xfunc_find_references(get_iterexpr((Iter)clause)));
    else if (IsA(clause,ArrayRef))
	return(xfunc_find_references(get_refexpr((ArrayRef)clause)));
    else if (fast_is_clause(clause)) {
	/* string together result of all operands of Oper */
	for (tmpclause = (LispValue)get_opargs(clause); tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    retval = nconc(retval, xfunc_find_references(lfirst(tmpclause)));
	return(retval);
    }else if (fast_is_funcclause(clause)) {
	/* string together result of all args of Func */
	for (tmpclause = (LispValue)get_funcargs(clause); 
	     tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    retval = nconc(retval, xfunc_find_references(lfirst(tmpclause)));
	return(retval);
    }else if (fast_not_clause(clause))
	return(xfunc_find_references(lsecond(clause)));
    else if (fast_or_clause(clause)) {
	/* string together result of all operands of OR */
	for (tmpclause = lnext(clause); tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    retval = nconc(retval, xfunc_find_references(lfirst(tmpclause)));
	return(retval);
    }else {
	elog(WARN, "Clause node of undetermined type");
	return((List)LispNil);
    }
}

/*
 ** xfunc_primary_join:
 **   Find the primary join clause: for Hash and Merge Joins, this is the
 ** min rank Hash or Merge clause, while for Nested Loop it's the
 ** min rank pathclause
 */
LispValue xfunc_primary_join(JoinPath pathnode)
{
    LispValue joinclauselist = get_pathclauseinfo(pathnode);
    CInfo mincinfo;
    LispValue tmplist;
    LispValue minclause = LispNil;
    Cost minrank, tmprank;
    
    if (IsA(pathnode,MergePath)) 
	{
	    for(tmplist = get_path_mergeclauses((MergePath)pathnode),
		minclause = lfirst(tmplist),
		minrank = xfunc_rank(minclause);
		tmplist != LispNil;
		tmplist = lnext(tmplist))
		if ((tmprank = xfunc_rank(lfirst(tmplist)))
		    < minrank)
		    {
			minrank = tmprank;
			minclause = lfirst(tmplist);
		    }
	    return(minclause);
	}
    else if (IsA(pathnode,HashPath)) 
	{
	    for(tmplist = get_path_hashclauses((HashPath)pathnode),
		minclause = lfirst(tmplist),
		minrank = xfunc_rank(minclause);
		tmplist != LispNil;
		tmplist = lnext(tmplist))
		if ((tmprank = xfunc_rank(lfirst(tmplist)))
		    < minrank)
		    {
			minrank = tmprank;
			minclause = lfirst(tmplist);
		    }
	    return(minclause);
	}
    
    /* if we drop through, it's nested loop join */
    if (joinclauselist == LispNil)
	return(LispNil);
    
    for(tmplist = joinclauselist, mincinfo = (CInfo) lfirst(joinclauselist),
	minrank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)));
	tmplist != LispNil;
	tmplist = lnext(tmplist))
	if ((tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist))))
	    < minrank)
	    {
		minrank = tmprank;
		mincinfo = (CInfo) lfirst(tmplist);
	    }
    return((LispValue)get_clause(mincinfo));
}

/*
 ** xfunc_get_path_cost
 **   get the expensive function costs of the path
 */
Cost xfunc_get_path_cost(Query *queryInfo, Path pathnode)
{
    Cost cost = 0;
    LispValue tmplist;
    Cost selec = 1.0;
    
    /* 
     ** first add in the expensive local function costs.
     ** We ensure that the clauses are sorted by rank, so that we
     ** know (via selectivities) the number of tuples that will be checked
     ** by each function.  If we're not doing any optimization of expensive
     ** functions, we don't sort.
     */
    if (XfuncMode != XFUNC_OFF)
	set_locclauseinfo(pathnode, lisp_qsort(get_locclauseinfo(pathnode),
					       xfunc_cinfo_compare));
    for(tmplist = get_locclauseinfo(pathnode), selec = 1.0;
	tmplist != LispNil;
	tmplist = lnext(tmplist))
	{
	    cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist)))
			   * (Cost)get_tuples(get_parent(pathnode)) * selec);
	    selec *= compute_clause_selec(queryInfo,
					  get_clause((CInfo)lfirst(tmplist)), 
					  LispNil);
	}
    
    /* 
     ** Now add in any node-specific expensive function costs.
     ** Again, we must ensure that the clauses are sorted by rank.
     */
    if (IsA(pathnode,JoinPath))
	{
	    if (XfuncMode != XFUNC_OFF)
		set_pathclauseinfo((JoinPath)pathnode, lisp_qsort
				   (get_pathclauseinfo((JoinPath)pathnode),
				    xfunc_cinfo_compare));
	    for(tmplist = get_pathclauseinfo((JoinPath)pathnode), selec = 1.0;
		tmplist != LispNil;
		tmplist = lnext(tmplist))
		{
		    cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist)))
				   * (Cost)get_tuples(get_parent(pathnode)) * selec);
		    selec *= compute_clause_selec(queryInfo,
						  get_clause((CInfo)lfirst(tmplist)),
						  LispNil);
		}
	}
    if (IsA(pathnode,HashPath))
	{
	    if (XfuncMode != XFUNC_OFF)
		set_path_hashclauses
		    ((HashPath)pathnode, 
		     lisp_qsort(get_path_hashclauses((HashPath)pathnode),
				xfunc_clause_compare));
	    for(tmplist = get_path_hashclauses((HashPath)pathnode), selec = 1.0;
		tmplist != LispNil;
		tmplist = lnext(tmplist))
		{
		    cost += (Cost)(xfunc_local_expense(lfirst(tmplist))
				   * (Cost)get_tuples(get_parent(pathnode)) * selec);
		    selec *= compute_clause_selec(queryInfo, 
						  lfirst(tmplist), LispNil);
		}
	}
    if (IsA(pathnode,MergePath))
	{
	    if (XfuncMode != XFUNC_OFF)
		set_path_mergeclauses
		    ((MergePath)pathnode, 
		     lisp_qsort(get_path_mergeclauses((MergePath)pathnode),
				xfunc_clause_compare));
	    for(tmplist = get_path_mergeclauses((MergePath)pathnode), selec = 1.0;
		tmplist != LispNil;
		tmplist = lnext(tmplist))
		{
		    cost += (Cost)(xfunc_local_expense(lfirst(tmplist))
				   * (Cost)get_tuples(get_parent(pathnode)) * selec);
		    selec *= compute_clause_selec(queryInfo,
						  lfirst(tmplist), LispNil);
		}
	}
    Assert(cost >= 0);
    return(cost);
}

/*
 ** Recalculate the cost of a path node.  This includes the basic cost of the 
 ** node, as well as the cost of its expensive functions.
 ** We need to do this to the parent after pulling a clause from a child into a
 ** parent.  Thus we should only be calling this function on JoinPaths.
 */
Cost xfunc_total_path_cost(JoinPath pathnode)
{
    Cost cost = xfunc_get_path_cost((Path)pathnode);
    
    Assert(IsA(pathnode,JoinPath));
    if (IsA(pathnode,MergePath))
	{
	    MergePath mrgnode = (MergePath)pathnode;
	    cost += cost_mergesort(get_path_cost((Path)get_outerjoinpath(mrgnode)),
				   get_path_cost((Path)get_innerjoinpath(mrgnode)),
				   get_outersortkeys(mrgnode),
				   get_innersortkeys(mrgnode),
				   get_tuples(get_parent((Path)get_outerjoinpath
							 (mrgnode))),
				   get_tuples(get_parent((Path)get_innerjoinpath
							 (mrgnode))),
				   get_width(get_parent((Path)get_outerjoinpath
							(mrgnode))),
				   get_width(get_parent((Path)get_innerjoinpath
							(mrgnode))));
	    Assert(cost >= 0);
	    return(cost);
	}
    else if (IsA(pathnode,HashPath))
	{
	    HashPath hashnode = (HashPath)pathnode;
	    cost += cost_hashjoin(get_path_cost((Path)get_outerjoinpath(hashnode)),
				  get_path_cost((Path)get_innerjoinpath(hashnode)),
				  get_outerhashkeys(hashnode),
				  get_innerhashkeys(hashnode),
				  get_tuples(get_parent((Path)get_outerjoinpath
							(hashnode))),
				  get_tuples(get_parent((Path)get_innerjoinpath
							(hashnode))),
				  get_width(get_parent((Path)get_outerjoinpath
						       (hashnode))),
				  get_width(get_parent((Path)get_innerjoinpath
						       (hashnode))));
	    Assert (cost >= 0);
	    return(cost);
	}
    else /* Nested Loop Join */
	{
	    cost += cost_nestloop(get_path_cost((Path)get_outerjoinpath(pathnode)),
				  get_path_cost((Path)get_innerjoinpath(pathnode)),
				  get_tuples(get_parent((Path)get_outerjoinpath
							(pathnode))),
				  get_tuples(get_parent((Path)get_innerjoinpath
							(pathnode))),
				  get_pages(get_parent((Path)get_outerjoinpath
						       (pathnode))),
				  IsA(get_innerjoinpath(pathnode),IndexPath));
	    Assert(cost >= 0);
	    return(cost);
	}
}


/*
 ** xfunc_expense_per_tuple --
 **    return the expense of the join *per-tuple* of the input relation.
 ** The cost model here is that a join costs
 **     k*card(outer)*card(inner) + l*card(outer) + m*card(inner) + n
 **
 ** We treat the l and m terms by considering them to be like restrictions
 ** constrained to be right under the join.  Thus the cost per inner and
 ** cost per outer of the join is different, reflecting these virtual nodes.
 **
 ** The cost per tuple of outer is k + l/referenced(inner).  Cost per tuple
 ** of inner is k + m/referenced(outer).
 ** The constants k, l, m and n depend on the join method.  Measures here are
 ** based on the costs in costsize.c, with fudging for HashJoin and Sorts to
 ** make it fit our model (the 'q' in HashJoin results in a
 ** card(outer)/card(inner) term, and sorting results in a log term.
 
 */
Cost xfunc_expense_per_tuple(JoinPath joinnode, int whichchild)
{
    Rel outerrel = get_parent((Path)get_outerjoinpath(joinnode));
    Rel innerrel = get_parent((Path)get_innerjoinpath(joinnode));
    Count outerwidth = get_width(outerrel);
    Count outers_per_page = ceil(BLCKSZ/(outerwidth + sizeof(HeapTupleData)));
    
    if (IsA(joinnode,HashPath))
	{
	    if (whichchild == INNER)
		return((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers);
	    else 
		return(((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers)
		       + _CPU_PAGE_WEIGHT_
		       / xfunc_card_product(get_relids(innerrel)));
	}
    else if (IsA(joinnode,MergePath))
	{
	    /* assumes sort exists, and costs one (I/O + CPU) per tuple */
	    if (whichchild == INNER)
		return((2*_CPU_PAGE_WEIGHT_ + 1)
		       / xfunc_card_product(get_relids(outerrel)));
	    else
		return((2*_CPU_PAGE_WEIGHT_ + 1)
		       / xfunc_card_product(get_relids(innerrel)));
	}
    else /* nestloop */
	{
	    Assert(IsA(joinnode,JoinPath));
	    return(_CPU_PAGE_WEIGHT_);
	}
}

/*
 ** xfunc_fixvars --
 ** After pulling up a clause, we must walk its expression tree, fixing Var 
 ** nodes to point to the correct varno (either INNER or OUTER, depending
 ** on which child the clause was pulled from), and the right varattno in the 
 ** target list of the child's former relation.  If the target list of the
 ** child Rel does not contain the attribute we need, we add it.
 */
void xfunc_fixvars(LispValue clause,  /* clause being pulled up */
		   Rel rel,           /* rel it's being pulled from */
		   int varno)         /* whether rel is INNER or OUTER of join */
{
    LispValue tmpclause;  /* temporary variable */
    TargetEntry *tle;              /* tlist member corresponding to var */
    
    
    if (IsA(clause,Const) || IsA(clause,Param)) return;
    else if (IsA(clause,Var))
	{
	    /* here's the meat */
	    tle = tlistentry_member((Var)clause, get_targetlist(rel));
	    if (tle == LispNil)
		{
		    /* 
		     ** The attribute we need is not in the target list,
		     ** so we have to add it.
		     **
		     */
		    add_tl_element(rel, (Var)clause);
		    tle = tlistentry_member((Var)clause, get_targetlist(rel));
		}
	    set_varno(((Var)clause), varno);
	    set_varattno(((Var)clause), get_resno(get_resdom(get_entry(tle))));
	}
    else if (IsA(clause,Iter))
	xfunc_fixvars(get_iterexpr((Iter)clause), rel, varno);
    else if (fast_is_clause(clause))
	{
	    xfunc_fixvars(lfirst(lnext(clause)), rel, varno);
	    xfunc_fixvars(lfirst(lnext(lnext(clause))), rel, varno);
	}
    else if (fast_is_funcclause(clause))
	for (tmpclause = lnext(clause); tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    xfunc_fixvars(lfirst(tmpclause), rel, varno);
    else if (fast_not_clause(clause))
	xfunc_fixvars(lsecond(clause), rel, varno);
    else if (fast_or_clause(clause))
	for (tmpclause = lnext(clause); tmpclause != LispNil; 
	     tmpclause = lnext(tmpclause))
	    xfunc_fixvars(lfirst(tmpclause), rel, varno);
    else
	{
	    elog(WARN, "Clause node of undetermined type");
	}
}


/*
 ** Comparison function for lisp_qsort() on a list of CInfo's.
 ** arg1 and arg2 should really be of type (CInfo *).  
 */
int xfunc_cinfo_compare(void *arg1, void *arg2)
{
    CInfo info1 = *(CInfo *) arg1;
    CInfo info2 = *(CInfo *) arg2;
    
    LispValue clause1 = (LispValue) get_clause(info1),
    clause2 = (LispValue) get_clause(info2);
    
    return(xfunc_clause_compare((void *) &clause1, (void *) &clause2));
}

/*
 ** xfunc_clause_compare: comparison function for lisp_qsort() that compares two 
 ** clauses based on expense/(1 - selectivity)
 ** arg1 and arg2 are really pointers to clauses.
 */
int xfunc_clause_compare(void *arg1, void *arg2)
{
    LispValue clause1 = *(LispValue *) arg1;
    LispValue clause2 = *(LispValue *) arg2;
    Cost rank1,             /* total xfunc rank of clause1 */ 
    rank2;             /* total xfunc rank of clause2 */
    
    rank1 = xfunc_rank(clause1);
    rank2 = xfunc_rank(clause2);
    
    if ( rank1 < rank2) 
	return(-1);
    else if (rank1 == rank2)
	return(0);
    else return(1);
}

/*
 ** xfunc_disjunct_sort --
 **   given a list of clauses, for each clause sort the disjuncts by cost
 **   (this assumes the predicates have been converted to Conjunctive NF)
 **   Modifies the clause list!
 */
void xfunc_disjunct_sort(LispValue clause_list)
{
    LispValue temp;
    
    foreach(temp, clause_list)
	if(or_clause(lfirst(temp)))
	    lnext(lfirst(temp)) =
		lisp_qsort(lnext(lfirst(temp)), xfunc_disjunct_compare);
}


/*
 ** xfunc_disjunct_compare: comparison function for qsort() that compares two 
 ** disjuncts based on cost/selec.
 ** arg1 and arg2 are really pointers to disjuncts
 */
int xfunc_disjunct_compare(Query* queryInfo, void *arg1, void *arg2)
{
    LispValue disjunct1 = *(LispValue *) arg1;
    LispValue disjunct2 = *(LispValue *) arg2;
    Cost cost1,  /* total cost of disjunct1 */ 
    cost2,  /* total cost of disjunct2 */
    selec1,
    selec2;
    Cost rank1, rank2;
    
    cost1 = xfunc_expense(queryInfo, disjunct1);
    cost2 = xfunc_expense(queryInfo, disjunct2);
    selec1 = compute_clause_selec(queryInfo,
				  disjunct1, LispNil);
    selec2 = compute_clause_selec(queryInfo,
				  disjunct2, LispNil);
    
    if (selec1 == 0)
	rank1 = MAXFLOAT;
    else if (cost1 == 0)
	rank1 = 0;
    else
	rank1 = cost1/selec1;
    
    if (selec2 == 0)
	rank2 = MAXFLOAT;
    else if (cost2 == 0)
	rank2 = 0;
    else
	rank2 = cost2/selec2;
    
    if ( rank1 < rank2) 
	return(-1);
    else if (rank1 == rank2)
	return(0);
    else return(1);
}

/* ------------------------ UTILITY FUNCTIONS ------------------------------- */
/*
 ** xfunc_func_width --
 **    Given a function OID and operands, find the width of the return value.
 */
int xfunc_func_width(RegProcedure funcid, LispValue args)
{
    Relation rd;         /* Relation Descriptor */
    HeapTuple tupl;      /* structure to hold a cached tuple */
    Form_pg_proc proc;   /* structure to hold the pg_proc tuple */
    TypeTupleForm type;   /* structure to hold the pg_type tuple */
    LispValue tmpclause; 
    int retval;
    
    /* lookup function and find its return type */
    Assert(RegProcedureIsValid(funcid));
    tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid), 0,0,0);
    if (!HeapTupleIsValid(tupl))
	elog(WARN, "Cache lookup failed for procedure %d", funcid);
    proc = (Form_pg_proc) GETSTRUCT(tupl);
    
    /* if function returns a tuple, get the width of that */
    if (typeid_get_relid(proc->prorettype))
	{
	    rd = heap_open(typeid_get_relid(proc->prorettype));
	    retval = xfunc_tuple_width(rd);
	    heap_close(rd);
	    goto exit;
	}
    else /* function returns a base type */
	{
	    tupl = SearchSysCacheTuple(TYPOID,
				       ObjectIdGetDatum(proc->prorettype),
				       0,0,0);
	    if (!HeapTupleIsValid(tupl))
		elog(WARN, "Cache lookup failed for type %d", proc->prorettype);
	    type = (TypeTupleForm) GETSTRUCT(tupl);
	    /* if the type length is known, return that */
	    if (type->typlen != -1)
		{
		    retval = type->typlen;
		    goto exit;
		}
	    else /* estimate the return size */
		{
		    /* find width of the function's arguments */
		    for (tmpclause = args; tmpclause != LispNil; 
			 tmpclause = lnext(tmpclause))
			retval += xfunc_width(lfirst(tmpclause));
		    /* multiply by outin_ratio */
		    retval = (int)(proc->prooutin_ratio/100.0 * retval);
		    goto exit;
		}
	}
 exit:
    return(retval);
}

/*
 ** xfunc_tuple_width --
 **     Return the sum of the lengths of all the attributes of a given relation
 */
int xfunc_tuple_width(Relation rd)
{
    int i;
    int retval = 0;
    TupleDesc tdesc = RelationGetTupleDescriptor(rd);
    
    for (i = 0; i < tdesc->natts; i++)
	{
	    if (tdesc->attrs[i]->attlen != -1)
		retval += tdesc->attrs[i]->attlen;
	    else retval += VARLEN_DEFAULT;
	}
    
    return(retval);
}

/*
 ** xfunc_num_join_clauses --
 **   Find the number of join clauses associated with this join path
 */
int xfunc_num_join_clauses(JoinPath path)
{
    int num = length(get_pathclauseinfo(path));
    if (IsA(path,MergePath))
	return(num + length(get_path_mergeclauses((MergePath)path)));
    else if (IsA(path,HashPath))
	return(num + length(get_path_hashclauses((HashPath)path)));
    else return(num);
}

/*
 ** xfunc_LispRemove --
 **   Just like LispRemove, but it whines if the item to be removed ain't there
 */
LispValue xfunc_LispRemove(LispValue foo, List bar) 
{
    LispValue temp = LispNil;
    LispValue result = LispNil;
    int sanity = false;
    
    for (temp = bar; !null(temp); temp = lnext(temp))
	if (! equal((Node)(foo),(Node)(lfirst(temp))) ) 
	    {
		result = lappend(result,lfirst(temp));
	    }
	else sanity = true; /* found a matching item to remove! */
    
    if (!sanity)
	elog(WARN, "xfunc_LispRemove: didn't find a match!");
    
    return(result);
}

#define Node_Copy(a, b, c, d) \
    if (NodeCopy((Node)((a)->d), (Node*)&((b)->d), c) != true) { \
							return false; \
							} 

/*
 ** xfunc_copyrel --
 **   Just like _copyRel, but doesn't copy the paths
 */
bool xfunc_copyrel(Rel from, Rel *to)
{
    Rel	newnode;
    Pointer (*alloc)() = palloc;
    
    /*  COPY_CHECKARGS() */
    if (to == NULL) 
	{ 
	    return false;
	} 
    
    /* COPY_CHECKNULL() */
    if (from == NULL) 
	{
	    (*to) = NULL;
	    return true;
	} 
    
    /* COPY_NEW(c) */
    newnode =  (Rel)(*alloc)(classSize(Rel));
    if (newnode == NULL) 
	{ 
	    return false;
	} 
    
    /* ----------------
     *	copy node superclass fields
     * ----------------
     */
    CopyNodeFields((Node)from, (Node)newnode, alloc);
    
    /* ----------------
     *	copy remainder of node
     * ----------------
     */
    Node_Copy(from, newnode, alloc, relids);
    
    newnode->indexed = from->indexed;
    newnode->pages =   from->pages;
    newnode->tuples =  from->tuples;
    newnode->size =    from->size;
    newnode->width =   from->width;
    
    Node_Copy(from, newnode, alloc, targetlist);
    /* No!!!!    Node_Copy(from, newnode, alloc, pathlist);  
       Node_Copy(from, newnode, alloc, unorderedpath);
       Node_Copy(from, newnode, alloc, cheapestpath);  */
#if 0	/* can't use Node_copy now. 2/95 -ay */
    Node_Copy(from, newnode, alloc, classlist);
    Node_Copy(from, newnode, alloc, indexkeys);
    Node_Copy(from, newnode, alloc, ordering);
#endif
    Node_Copy(from, newnode, alloc, clauseinfo);
    Node_Copy(from, newnode, alloc, joininfo);
    Node_Copy(from, newnode, alloc, innerjoin);
    Node_Copy(from, newnode, alloc, superrels);
    
    (*to) = newnode;
    return true;
}
